----------------------------------------------------------------------------------
      name:  <unnamed>
       log:  C:\research\china\decentralization\restat_data\tabdata\dofiles\tab_da
> ta\tabdata5-2empcen.log
  log type:  text
 opened on:  21 Jul 2016, 11:56:08

. set linesize 255

. 
. *****************************************************
. * CENSUS DATA EMPLOYMENT COUNT BY CATEGORY
. *****************************************************
. /*
> PURPOSE: count employment by year x industry for 1995 and 2008.
> 
> CHANGES:
> - 8/27: new industry definitions.
> - 9/3: organize a bit.
> */
. *****************************************************
. * Date created: 8/15/2013 by Ying Chen
. * Date updated: 9/3/2013
. *****************************************************
. * Files in:
. global cns95 "..\..\data\empcensus\source\cns95.dta"

. global cns08 "..\..\data\empcensus\source\cns08.dta"

. global ciccorr "..\..\data\empcensus\generated\cic_correspondence.dta"

. * Files out:
. global out2 "..\..\data\empcensus\generated\cns_emp_count_newind.dta"

. *****************************************************
. 
. * 1995
. use $cns95, clear

. drop _me

. gen emp=gongren_he_xuetu_b
(16,224 missing values generated)

. gen cs02=cic

. merge m:1 cs02 using $ciccorr

    Result                           # of obs.
    -----------------------------------------
    not matched                        47,362
        from master                    47,361  (_merge==1)
        from using                          1  (_merge==2)

    matched                           434,544  (_merge==3)
    -----------------------------------------

. drop if _me==2
(1 observation deleted)

. replace cic=cs03 if _me==3
(304,537 real changes made)

. drop _me

. gen indCode4=cic

. gen indCode2=substr(cic,1,2)

. gen indCode3=substr(cic,1,3)

. destring indCode*, replace
indCode4 has all characters numeric; replaced as int
indCode2 has all characters numeric; replaced as byte
indCode3 has all characters numeric; replaced as int

. keep if (indCode2>=6 & indCode2<=46)
(0 observations deleted)

. 
. * Industry categories
. gen food=(indCode2>=13 & indCode2<=16)

. gen trad=((indCode2>=20 & indCode2<=24) | indCode2==42)

. gen txtlaprl=(indCode2>=17 & indCode2<=19)

. gen plstc=(indCode2>=25 & indCode2<=30)

. gen nonmtl=(indCode2==31)

. gen metals=(indCode2>=32 & indCode2<=34)

. gen metal=(indCode2==32 | indCode2==33)

. gen metalpdct=(indCode2==34)

. gen machinery=(indCode2>=35 & indCode2<=37)

. gen cptNinst=(indCode2>=39 & indCode2<=41)

. gen ht=(indCode3==368 | indCode3==376 | indCode2==40 | indCode3==411 | indCode3==412 | indCode3==414 | indCode3==419)

. gen nonht=((indCode2>=35 & indCode2<=39 & ht==0) | (indCode3==413 | indCode3==415))

. gen aprl=(indCode2==18)

. gen eduspt=(indCode2==24)

. gen geq=(indCode2==35)

. gen seq=(indCode2==36)

. gen trans=(indCode2==37)

. gen ptran=(indCode3==371|indCode3==372|indCode3==373)

. gen eeq=(indCode2==39)

. gen comm=(indCode2==40)

. gen inst=(indCode2==41)

. gen art=(indCode2==42)

. gen lghttrad=(indCode2==21 | indCode2==23 | indCode2==24)

. gen hvytrad=(indCode2==20 | indCode2==22)

. gen plstcNrbbr=(indCode2==29 | indCode2==30)

. gen allhvy=(indCode2==20 | indCode2==22 | (indCode2>=13 & indCode2<=16) | (indCode2>=25 & indCode2<=28) | (indCode2>=31 & indCode2<=33))

. gen allmdm=(indCode2==21 | indCode2==23 | indCode2==24 | indCode2==29 | indCode2==30 | indCode2==34)

. 
. * count employment for each industry
. gen year=1995

. bys year unit_code_common: egen unit_emp=total(emp)

. foreach ind in food trad txtlaprl plstc nonmtl metals metal metalpdct machinery cptNinst ht nonht aprl eduspt geq seq trans ptran eeq comm inst art lghttrad hvytrad plstcNrbbr allhvy allmdm {
  2.         bys year unit_code_common: egen `ind'_emp=total(emp) if `ind'==1
  3.         gsort year unit_code_common -`ind'_emp
  4.         by year unit_code_common: replace `ind'_emp=`ind'_emp[1] if `ind'_emp==.
  5.         replace `ind'_emp=0 if `ind'_emp==.
  6.         }
(423602 missing values generated)
(423351 real changes made)
(251 real changes made)
(411408 missing values generated)
(411180 real changes made)
(228 real changes made)
(429018 missing values generated)
(424400 real changes made)
(4,618 real changes made)
(422911 missing values generated)
(421374 real changes made)
(1,537 real changes made)
(424210 missing values generated)
(423532 real changes made)
(678 real changes made)
(444447 missing values generated)
(440290 real changes made)
(4,157 real changes made)
(469774 missing values generated)
(433775 real changes made)
(35,999 real changes made)
(456578 missing values generated)
(444047 real changes made)
(12,531 real changes made)
(413749 missing values generated)
(410884 real changes made)
(2,865 real changes made)
(448738 missing values generated)
(421429 real changes made)
(27,309 real changes made)
(468921 missing values generated)
(351331 real changes made)
(117,590 real changes made)
(393574 missing values generated)
(391315 real changes made)
(2,259 real changes made)
(464703 missing values generated)
(437898 real changes made)
(26,805 real changes made)
(477686 missing values generated)
(315293 real changes made)
(162,393 real changes made)
(451521 missing values generated)
(430143 real changes made)
(21,378 real changes made)
(464129 missing values generated)
(442368 real changes made)
(21,761 real changes made)
(461909 missing values generated)
(439886 real changes made)
(22,023 real changes made)
(466266 missing values generated)
(434641 real changes made)
(31,625 real changes made)
(461875 missing values generated)
(425075 real changes made)
(36,800 real changes made)
(474221 missing values generated)
(308685 real changes made)
(165,536 real changes made)
(476452 missing values generated)
(290283 real changes made)
(186,169 real changes made)
(467476 missing values generated)
(441654 real changes made)
(25,822 real changes made)
(453773 missing values generated)
(452524 real changes made)
(1,249 real changes made)
(453969 missing values generated)
(448941 real changes made)
(5,028 real changes made)
(458934 missing values generated)
(442750 real changes made)
(16,184 real changes made)
(289817 missing values generated)
(289781 real changes made)
(36 real changes made)
(405475 missing values generated)
(405247 real changes made)
(228 real changes made)

. by year unit_code_common: keep if _n==1
(479,587 observations deleted)

. keep year unit_code_common *_emp

. tempfile 95

. save `95'
file C:\Users\NATE~1.BAU\AppData\Local\Temp\ST_01000001.tmp saved

. 
. 
. 
. * 2008
. use indus_2digcode industrycode labor year unit_code_common using $cns08, clear
file ..\..\data\empcensus\source\cns08.dta not found
r(601);

end of do-file
r(601);

end of do-file

r(601);

. clear

. do master-tab.do

. do tabdata1.do

. /*** tabdata1.do
> 
> This do-file merges city proper level tabular data to create one data set.
> Included is data on aggregates of urban districts only.  Data on county cities
> is incorporated into the us123.dta data set.
> 
> ***/
. 
. clear

. set more off

. capture log close
